********************************************************************************
*PROJECT: Legislature Integration and Bipartisanship: A Natural Experiment in Iceland							   
*PURPOSE: Prep voting data										   										   
********************************************************************************

clear all
set mem 1g
set maxvar 32000
set more off
local filefolder = "$path_pch\Data\raw\Voting"
local rawmpfolder = "$path_pch/Data/raw/MP"
local intmpfolder = "$path_pch/Data/intermediate/MP"
local cosponfolder = "$path_pch\Data\raw\Co-Sponsorship"
local seatingfolder = "$path_pch\Data\intermediate\Seating"
local votingfolder = "$path_pch\Data\raw\Voting"
local votingintfolder = "$path_pch\Data\intermediate\Voting"
local votinganalyfolder = "$path_pch\Data\analysis\Voting"
local tablefolder = "$path_pch\Output\Tables"
local graphfolder = "$path_pch\Output\Graphs"
local maxdraw = 0
tempfile MP mpname IDs leaders leaderssaia leadervote neighvote
timer on 1


*********************************************************************
// Bring MP data and prep
*********************************************************************

use "`seatingfolder'\seating_MP_frontback.dta", clear
keep MP_name MP_id session_id party party_id strata gender coalition ///
	seat_initRI0 seat_initRI1 seat_initRI2 ?_*RI0 ?_*RI1 ?_*RI2
save `MP'

gen idusing = session_id * 10000 + MP_id
ren MP_name mpname
save `mpname'

use "`intmpfolder'/MP_bio.dta", clear
keep MP_name MP_id
duplicates drop
duplicates tag MP_name, g(dup)
drop if dup>0
drop dup
isid MP_name
save `IDs'


******************************************************
// bring vote data
// scraped from: http://www.althingi.is/thingstorf/thingmalin/atkvaedagreidsla/?nnafnak=51983
******************************************************

// first get newly collected session_id data
cd "`filefolder'"
import delimited using vote_info.csv, clear
ren v1 vote_id
ren v2 session_id
ren v3 session_id_alt
ren v4 bill_id
ren v5 datetime
assert session_id == session_id_alt
drop session_id_alt
drop if mi(session_id)

// determine first half (the votes are chronological order, so we can simply use vote number)
bys session_id: gen N = _N
bys session_id (vote_id): gen n = _n
gen first_half = n <= N/2
drop n N

tempfile session
save `session'

// now get voting info of each MP.
cd "`filefolder'"
import delimited using vote_ind.csv, clear encoding("utf-8")
ren vote vote_raw
gen vote = "absent" if inlist(vote_raw,"fjarverandi","fjarvist")
replace vote = "abstain" if vote_raw == "greiðir ekki atkvæði"
replace vote = "no" if vote_raw == "nei"
replace vote = "yes" if vote_raw == "já"
ren name MP_name
duplicates drop //vote_id == 40426 collected twice somehow (so 63 observations should be dropped)

// Some manual fixes for abbreviation
replace MP_name = "Guðni Ágústsson" if MP_name == "GÁ"
replace MP_name = "Sighvatur Björgvinsson" if MP_name == "SighB"
replace MP_name = "Ísólfur Gylfi Pálmason" if MP_name == "ÍGP"

// merge session info
merge n:1 vote_id using `session', assert(3) nogen

// merge full set of IDs to use for party leader's vote later
preserve
	merge m:1 MP_name using `IDs'
	keep if _merge==3
	keep MP_id vote_id vote_raw vote
	ren MP_id party_leader_id
	ren vote_raw leader_vote_raw
	ren vote leader_vote
	isid party_leader_id vote_id
	save `leadervote'
restore
	
/*
// fuzzy matching for diagnosis: takes forever. Given up.
egen idmaster = group(session_id MP_name)
capture noisily {
	matchit idmaster MP_name using `mpname', idusing(idusing) txtusing(mpname) gen(similscore) override
	beep
}
if (_rc) {
	beep
	sleep 2000
	beep
	sleep 2000
	beep
}
*/

// merge MP_info
merge n:1 MP_name session_id using `MP'
keep if _merge == 3
drop _merge


*********************************************************************
// Import and clean party leader data
*********************************************************************

preserve
	import excel using "`rawmpfolder'/PartyLeaders1991to2018.xlsx", clear first
	keep party_id party_leader_id scode ecode
	drop if scode=="" | party_id==. | party_leader_id==. // no party leaders for 
							// Women's Alliance (5), Independent (6)
							// Parliamentary Party of Independents (10)
							// Citizens' Movement (13)
	foreach x in s e {
		g `x'date = mdy(real(substr(`x'code,4,2)),real(substr(`x'code,1,2)),real(substr(`x'code,7,4)))
	}
	format ?date %d
	drop ?code
	expand 2, g(copy)
	g votedate = sdate
	replace votedate = edate if copy==1
	drop copy
	format votedate %d
	sort party_id votedate
	drop ?date
	egen partyXleader = group(party_id party_leader_id)
	tsset partyXleader votedate
	tsfill
	foreach var of varlist party_id party_leader_id {
		sort partyXleader votedate
		replace `var' = `var'[_n-1] if `var'==. & `var'[_n-1]!=. & partyXleader[_n-1]==partyXleader
	}
	drop partyXleader
	
	// fix issue where party leader had two separate periods of leadership
	drop if party_id==14 & party_leader_id==713 & votedate>=mdy(10,1,2011) & votedate<=mdy(9,10,2012)
	drop if party_id==19 & party_leader_id==728 & votedate>=mdy(10,1,2013) & votedate<=mdy(9,8,2014)
	
	// check each party only has one leader on a given date
	isid party_id votedate
	
	// temp save
	save `leaders'
restore
	

*********************************************************************
// Get Saia's party leader vote data
*********************************************************************

preserve
	use "`votingfolder'/voting_procedure_with_chairman_vote_and_senior_MP_saia", clear
	drop _merge_chairman
	ren session session_id 
	*keep if session_id<=145 & (!inlist(session_id,119,124,129,134,137,142))
	ren voting_procedure_id vote_id
	ren MP_party party
	g party_id=.
	
	replace party_id=2 if party=="Alþýðubandalag"
	replace party_id=1 if party=="Alþýðuflokkur"
	replace party_id=15 if party=="Björt framtíð"
	replace party_id=13 if party=="Borgarahreyfingin"
	replace party_id=3 if party=="Framsóknarflokkur"
	replace party_id=11 if party=="Frjálslyndi flokkurinn"
	replace party_id=14 if party=="Hreyfingin"
	replace party_id=19 if party=="Píratar"	
	replace party_id=9 if party=="Samfylkingin"
	replace party_id=5 if party=="Samtök um kvennalista"
	replace party_id=4 if party=="Sjálfstæðisflokkur"
	replace party_id=6 if party=="Utan þingflokka"
	replace party_id=12 if party=="Vinstri hreyfingin - grænt framboð"
	replace party_id=16 if party=="Viðreisn"
	replace party_id=8 if party=="Þingflokkur jafnaðarmanna"
	replace party_id=10 if party=="Þingflokkur óháðra"
	replace party_id=7 if party=="Þjóðvaki - hreyfing fólksins"
	
	ren party party_saia
	save `leaderssaia'
restore


*********************************************************************
// Merge on party leader data
*********************************************************************

// first get date variable
g voteyear = substr(datetime,7,4)
g votemonth = substr(datetime,4,2)
g voteday = substr(datetime,1,2)
destring voteyear votemonth voteday, replace
g votedate = mdy(votemonth, voteday, voteyear)
assert voteday!=. & votemonth!=. & voteyear!=. & inrange(voteday,1,31) ///
	& inrange(votemonth,1,12) & inrange(voteyear,1991,2018)
format votedate %d

// first merge identity of party leader
merge m:1 party_id votedate using `leaders', keep(1 3)
assert _merge==3 if !inlist(party_id,5,6,10,13) // these parties don't have party leaders
drop _merge
la var party_leader_id "MP_id of party leader, missing for parties without leaders (IDs 5,6,10,13)"

// merge on leader's vote
merge m:1 party_leader_id vote_id using `leadervote', keep(1 3)

// check no leader's votes entirely missing
bys party_leader_id: egen mergeAvg = mean(_merge)
assert mergeAvg!=1 if party_leader_id!=1123 & party_leader_id!=. // Jón Sigurðsson (https://www.althingi.is/altext/cv/is/?nfaerslunr=1123) 
drop mergeAvg _merge											// didn't vote when leader (think because didn't have a seat)

// other missing leader votes are when Deputy MP voted instead. Example:
// Session 1998/99: Asgrimsson (ID=8) vote found for vote_id 20014-20230, then missing from 20233-20554, ///
	// then found 20555-21077, then missing 21079-21597, then found 21600-21602 
	// can see that Deputy MP Jónas Hallgrímsson voted for vote_id 20554, but not for 20555
la var leader_vote_raw "raw vote of party leader, if have party leader and leader's vote recorded"
la var leader_vote "vote (absent categories joined) of party leader, if leader's vote recorded"


*********************************************************************
// Merge on chairperson (of parliamentary party, I think) data from Saia
*********************************************************************

// merge on chair's vote
merge m:1 session_id vote_id party_id using `leaderssaia' // some vote_id only in our data, some only in his
drop if _merge==2
ren _merge _chairmanmerge
replace chairman_vote = "abstain" if chairman_vote=="abstained"

// key outcome variable
g non_compliance = vote!=leader_vote if vote!="" & leader_vote!="" & MP_id!=party_leader_id
la var non_compliance "voted differently to party leader, missing if party leader"
g non_compliance_saia = vote!=chairman_vote if vote!="" & !inlist(chairman_vote,"","unknown")
la var non_compliance_saia "voted differently to chairperson of parliamentary party(?) (as in Saia)"


*********************************************************************
// Get modal party votes as alternative measure of party line
*********************************************************************

bys vote_id party_id: egen mode_party_vote = mode(vote)
g non_compliance_alt = vote!=mode_party_vote if vote!="" & mode_party_vote!=""
la var non_compliance_alt "voted differently to modal party vote"


** save just votes to merge on to neighbour ID
preserve
	keep MP_id vote_id vote_raw vote mode_party_vote chairman_vote
	foreach var of varlist MP_id vote_raw vote mode_party_vote chairman_vote {
		ren `var' N_`var'
	}
	save `neighvote'
restore


** merge on
forv i=0(1)`maxdraw' {
	foreach x in U L F B {
		ren `x'_MP_idRI`i' N_MP_id
		merge m:1 N_MP_id vote_id using `neighvote'
		drop if _merge==2
		drop _merge
		ren N_MP_id `x'_MP_idRI`i'
		ren N_vote_raw `x'_vote_rawRI`i'
		ren N_vote `x'_voteRI`i'
		ren N_mode_party_vote `x'_mode_party_voteRI`i'
		ren N_chairman_vote `x'_chairman_voteRI`i'
	}
}


*********************************************************************
// Get key regressor and IV
*********************************************************************

// divergent peers treatment/IV and alternative
forv j=0(1)`maxdraw' {
	local i=1
	foreach x in chairman_vote mode_party_vote {
		if `i'==1 {
			local suf = ""
		}
		else if `i'==2 {
			local suf = "_alt"
		}
		
		// treatment: all (up to) 4 neighbours
		g denom = (U_voteRI`j'!="") + (L_voteRI`j'!="") + (F_voteRI`j'!="") + (B_voteRI`j'!="") if `x'!=""
		g divergent_peers`suf'RI`j' = (U_voteRI`j'!=`x' & U_voteRI`j'!="") + (L_voteRI`j'!=`x' & L_voteRI`j'!="") + ///
										(F_voteRI`j'!=`x' & F_voteRI`j'!="") + (B_voteRI`j'!=`x' & B_voteRI`j'!="") if `x'!=""
		replace divergent_peers`suf'RI`j' = divergent_peers`suf'RI`j'/denom
		
		// treatment: L-R neighbours only
		g divergent_peersLR`suf'RI`j' = ((U_voteRI`j'!=`x') + (L_voteRI`j'!=`x'))/2 if U_voteRI`j'!="" & L_voteRI`j'!="" & `x'!=""
		replace divergent_peersLR`suf'RI`j' = U_voteRI`j'!=`x' if U_voteRI`j'!="" & L_voteRI`j'=="" & `x'!=""
		replace divergent_peersLR`suf'RI`j' = L_voteRI`j'!=`x' if L_voteRI`j'!="" & U_voteRI`j'=="" & `x'!=""
		
		// IV: all (up to) 4 neighbours
		g divergent_peers`suf'_ivRI`j' = (U_`x'RI`j'!=`x' & U_`x'RI`j'!="") + (L_`x'RI`j'!=`x' & L_`x'RI`j'!="") + ///
											(F_`x'RI`j'!=`x' & F_`x'RI`j'!="") + (B_`x'RI`j'!=`x' & B_`x'RI`j'!="") if `x'!=""
		replace divergent_peers`suf'_ivRI`j' = divergent_peers`suf'_ivRI`j'/denom
		drop denom
		
		// IV: L-R neighbours only
		g divergent_peersLR`suf'_ivRI`j' = ((U_`x'RI`j'!=`x') + (L_`x'RI`j'!=`x'))/2 if U_`x'RI`j'!="" & L_`x'RI`j'!="" & `x'!=""
		replace divergent_peersLR`suf'_ivRI`j' = U_`x'RI`j'!=`x' if U_`x'RI`j'!="" & L_`x'RI`j'=="" & `x'!=""
		replace divergent_peersLR`suf'_ivRI`j' = L_`x'RI`j'!=`x' if L_`x'RI`j'!="" & U_`x'RI`j'=="" & `x'!=""	
		
		local i=`i'+1
	}
}


// alternative treatment
forv i=0(1)`maxdraw' {
	g other_party_LRneighRI`i' = ((U_party_idRI`i'!=party_id) + (L_party_idRI`i'!=party_id))/2 if U_party_idRI`i'!=. & L_party_idRI`i'!=. & party_id!=.
	replace other_party_LRneighRI`i' = U_party_idRI`i'!=party_id if U_party_idRI`i'!=. & L_party_idRI`i'==. & party_id!=.
	replace other_party_LRneighRI`i' = L_party_idRI`i'!=party_id if L_party_idRI`i'!=. & U_party_idRI`i'==. & party_id!=.
}


// MP-session-level treatments
foreach x in "_saia" "_alt" {
	bys MP_id session_id: egen non_compliance`x'_mean = mean(non_compliance`x')
}

foreach x in "" "_alt" {
	forv i=0(1)`maxdraw' {
		bys MP_id session_id: egen divergent_peers`x'RI`i'_mean = mean(divergent_peers`x'RI`i')
		bys MP_id session_id: egen divergent_peers`x'_ivRI`i'_mean = mean(divergent_peers`x'_ivRI`i')
		bys MP_id session_id: egen divergent_peersLR`x'RI`i'_mean = mean(divergent_peersLR`x'RI`i')
	}
}


// share of peers from parties different from one of focal legislator
forv i = 0/`maxdraw' {
	g num_peersRI`i' = (U_MP_idRI`i'!=.) + (L_MP_idRI`i'!=.) + (F_MP_idRI`i'!=.) + (B_MP_idRI`i'!=.)
	g num_othparty_peersRI`i' = (U_party_idRI`i'!=party_id & U_MP_idRI`i'!=. & party_id!=.) + (L_party_idRI`i'!=party_id & L_MP_idRI`i'!=. & party_id!=.) + ///
							(F_party_idRI`i'!=party_id & F_MP_idRI`i'!=. & party_id!=.) + (B_party_idRI`i'!=party_id & B_MP_idRI`i'!=. & party_id!=.)
	g prop_othparty_peersRI`i' = num_othparty_peersRI`i'/num_peersRI`i'
	assert prop_othparty_peersRI`i'>=0 & prop_othparty_peersRI`i'<=1
}

// fixed effects
egen MPXSession = group(MP_id session_id)
egen PartyXSession = group(party_id session_id)
egen StrataXSessionXParty = group(party_id session_id strata)
egen PartyXVote = group(party_id vote_id)

// other vars
foreach x in yes no abstain absent {
	g chair_`x' = chairman_vote=="`x'" if chairman_vote!=""
}
g saia_period = (voteyear>=1992 & voteyear<=2016) | (voteyear==1991 & votemonth>=2) | (voteyear==2017 & votemonth<=2)
g specialSession = inlist(session_id,119,124,129,134,137,142)
g shortSession = session_id==147

// save
save "`votinganalyfolder'/voting_analysis", replace

timer off 1
timer list 1
beep
